
*********************************************************************************************************
* Figure A.4: Month of birth and age at first observed vote
*********************************************************************************************************

use  "Data_temp\bhps_US_vote_extract.dta", clear 

**************************************************************************
* Basic editing
**************************************************************************

* Birth date from months and year
gen birth_date=ym(birthy,birthm)
format birth_date %tm
lab var birth_date "Birth date (month and year)"
keep if inrange(birth_date,m(1970m4),m(2000m1))

* Interview date
gen interview_date=ym(year, doim)
replace interview_date=ym(year, intm) if interview_date==.
format interview_date %tm
lab var interview_date "Interview date (month and year)"

* age in years
gen age=((interview_date-birth_date)/12)
lab var age "Age at interview in years"

* min and max ages
bys pidp: egen min_age=min(age)
bys pidp: egen max_age=max(age)


* Election date
gen election1992=ym(1992,4)
gen election1997=ym(1997,5)
gen election2001=ym(2001,6)
gen election2005=ym(2005,5)
gen election2010=ym(2010,5)
gen election2015=ym(2015,5)
gen election2017=ym(2017,6)

foreach num of numlist 1992 1997 2001 2005 2010 2015 2017 {
	format election`num' %tm
	}

* Mark election cycle in which interview was held	
gen election=1992 		if (interview_date>=election1992 & interview_date<election1997)
replace election=1997 	if (interview_date>=election1997 & interview_date<election2001)
replace election=2001	if (interview_date>=election2001 & interview_date<election2005)
replace election=2005	if (interview_date>=election2005 & interview_date<election2010)
replace election=2010	if (interview_date>=election2010 & interview_date<election2015)
replace election=2015	if (interview_date>=election2015 & interview_date<election2017)
replace election=2017	if (interview_date>=election2017 & interview_date<.)
tab election

* Age at election
foreach x of numlist 1992 1997 2001 2005 2010 2015 2017 {
	gen age`x'=round((election`x'-birth_date)/12,.01) if election==`x'
	}
sum age*, d
	
**************************************************************************	
* only use survey between two elections to derive answers to specific election
gen tmp_vote1992=1 if (vote7==1) & (election1992<=interview_date & election1997>interview_date)
gen tmp_vote1997=1 if (vote7==1) & (election1997<=interview_date & election2001>interview_date)
gen tmp_vote2001=1 if (vote7==1) & (election2001<=interview_date & election2005>interview_date)
gen tmp_vote2005=1 if (vote7==1) & (election2005<=interview_date & election2010>interview_date)
gen tmp_vote2010=1 if (vote7==1) & (election2010<=interview_date & election2015>interview_date)
gen tmp_vote2015=1 if (vote7==1) & (election2015<=interview_date & election2017>interview_date)
gen tmp_vote2017=1 if (vote7==1) & (election2017<=interview_date & interview_date<.)

	
* only answers two years after election
foreach e in 1992 1997 2001 2005 2010 2015 2017 {
	dis `e'
	replace tmp_vote`e'=. if (interview_date-election`e'>24)
	ren tmp_vote`e' voted`e'
	}

* create variable to identify one of those votes
egen vote_help=rowtotal(voted*)
	
* Identify first vote of an individual in the data
gen voted=1 if vote_help==1
bys pidp (voted wave): gen first_vote=1 if _n==1 & vote7==1	& voted==1							// indicator for first vote
bys pidp (voted wave): gen first_vote_party=1 if _n==1 & vote7==1 	& voted==1	 & vote8>0		// first vote and specific party indicated (stricter measure)
sum first_vote*	

* Identify age at vote
gen first_age=.
foreach x in 1992 1997 2001 2005 2010 2015 2017 {
	replace first_age=round((election`x'-birth_date)/12,.01) if election==`x' & first_vote==1
	}
sum first_age, d // < 1% unrealistic cases



*************************************************************************
drop if birth_date>m(1992m5) // these were only eligible to vote in 2015 making their voting deterministic

drop if election==2017

sum age* if voted==1
gen elec_age=.
foreach x in 1992 1997 2001 2005 2010 2015 2017 {
	replace elec_age=age`x' if election==`x' & elec_age==. & voted==1
	}
sum elec_age if first_age<., d

keep if elec_age<=27

drop if first_age<=18

	
collapse first_age  (count) N=first_age, by(birth_date)	

sort birth_date
gen roll_average=(first_age+first_age[_n+1]+first_age[_n-1])/3
replace roll_average=first_age if roll==.

gen theo=22 if birth_date==m(1970m4) | birth_date==m(1979m6) | birth_date==m(1983m7) 
replace theo=18 if birth_date==m(1974m4) | birth_date==m(1979m5) | birth_date==m(1983m6) | birth_date==m(1987m5) | birth_date==m(1992m5) | birth_date==m(1997m5)
replace theo=23 if birth_date==m(1974m5) | birth_date==m(1987m6) | birth_date==m(1992m6)

twoway (scatter first_age birth_date, msize(small) color(edkblue) msymbol(Oh)) (line theo birth_date, lcolor(gs10) lpattern(dash)), /// // unstandardized patterns look odd already....
xline(171) xline(232) xline(281) xline(328)xlab(171 "4-1974" 232 "5-1979" 281 "6-1983" 328 "5-1987" ) ///
yline(18, lcolor(red)) ylab(16(2)28) xti("Month of birth") yti("Age at first observed vote") legend(off) ///
text(26.5 145 "Election:") text(26.5 180 "1992") text(26.5 241 "1997") text(26.5 290 "2001") text(26.5 337 "2005") ///
graphregion(margin(zero))
graph export "Figures/a_fg4.eps", replace




